#1 Read the dataset
setwd("C:/Users/imano/Downloads/repdata_data_activity")
activity<-read.csv("activity.csv")
#2 Histogram I made a histogram
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
steps <- activity[!(is.na(activity$steps)),]
newdata<-ddply(steps,.(date),summarise, sum =sum(steps))
hist(newdata$sum, xlab="Steps per day", breaks=53,
main="Histogram of the Total Number of Steps Taken per Day", col = "blue")
I calculate and report the mean and median of the total number of steps taken per day
media<-mean(newdata$sum)
median<-median(newdata$sum)
table(media, median)
## median
## media 10765
## 10766.1886792453 1
library(plotly)
## Warning: package 'plotly' was built under R version 4.1.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.1.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following objects are masked from 'package:plyr':
##
## arrange, mutate, rename, summarise
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
plot_ly(y=newdata$sum, type="box")
stepsinterval <- aggregate( steps~interval, steps,mean)
plot(steps~interval, data=stepsinterval, type="l")
maxsteps <- stepsinterval[which.max(stepsinterval$steps),]$interval
maxsteps
## [1] 835
Calculate and report the total number of missing values in the dataset (i.e. the total number of rows with 𝙽𝙰s
str(activity)
## 'data.frame': 17568 obs. of 3 variables:
## $ steps : int NA NA NA NA NA NA NA NA NA NA ...
## $ date : chr "2012-10-01" "2012-10-01" "2012-10-01" "2012-10-01" ...
## $ interval: int 0 5 10 15 20 25 30 35 40 45 ...
misingvalues<-sum(is.na(activity$steps))
Devise a strategy for filling in all of the missing values in the dataset. I use as strategy, i completed the missing values in the dataset with the mean per interval.
meanstepsinterval<-function(interval){
stepsinterval[stepsinterval$interval==interval,]$steps
}
I buid a dataset where the mising values are completed with the mean of the mean per interval
activitydata<-activity
for(i in 1:nrow(activitydata)){
if(is.na(activitydata[i,]$steps)){
activitydata[i,]$steps <- meanstepsinterval(activitydata[i,]$interval)
}
}
The new dataset doesnt have missing values
Make a histogram of the total number of steps taken each day and Calculate and report the mean and median total number of steps taken per day. Do these values differ from the estimates from the first part of the assignment? What is the impact of imputing missing data on the estimates of the total daily number of steps?
totalsteps <- aggregate(steps ~ date, data=activitydata, sum)
hist(totalsteps$steps)
mediasteps <- mean(totalsteps$steps)
medianSsteps <- median(totalsteps$step)
Create a new factor variable in the dataset with two levels – “weekday” and “weekend” indicating whether a given date is a weekday or weekend day.
activitydata$date <- as.Date(strptime(activitydata$date, format="%Y-%m-%d"))
activitydata$day <- weekdays(activitydata$date)
for (i in 1:nrow(activitydata)) {
if (activitydata[i,]$day %in% c("Saturday","Sunday")) {
activitydata[i,]$day<-"weekend"
}
else{
activitydata[i,]$day<-"weekday"
}
}
stepsday <- aggregate(activitydata$steps ~ activitydata$interval + activitydata$day, activitydata, mean)
I made a graph with the pattern
names(stepsday) <- c("interval", "day", "steps")
library(lattice)
## Warning: package 'lattice' was built under R version 4.1.2
xyplot(steps ~ interval | day, stepsday, type = "l", layout = c(1, 2),
xlab = "Interval", ylab = "Number of steps")